1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.apache.lucene.util.automaton;
31
32 import java.util.ArrayList;
33 import java.util.BitSet;
34 import java.util.HashSet;
35 import java.util.LinkedList;
36
37
38
39
40
41
42 final public class MinimizationOperations {
43
44 private MinimizationOperations() {}
45
46
47
48
49
50
51
52
53 public static Automaton minimize(Automaton a, int maxDeterminizedStates) {
54 if (a.getNumStates() == 0 || (a.isAccept(0) == false && a.getNumTransitions(0) == 0)) {
55
56 return new Automaton();
57 }
58 a = Operations.determinize(a, maxDeterminizedStates);
59
60 if (a.getNumTransitions(0) == 1) {
61 Transition t = new Transition();
62 a.getTransition(0, 0, t);
63 if (t.dest == 0 && t.min == Character.MIN_CODE_POINT
64 && t.max == Character.MAX_CODE_POINT) {
65
66 return a;
67 }
68 }
69 a = Operations.totalize(a);
70
71
72
73 final int[] sigma = a.getStartPoints();
74 final int sigmaLen = sigma.length, statesLen = a.getNumStates();
75
76 @SuppressWarnings({"rawtypes","unchecked"}) final ArrayList<Integer>[][] reverse =
77 (ArrayList<Integer>[][]) new ArrayList[statesLen][sigmaLen];
78 @SuppressWarnings({"rawtypes","unchecked"}) final HashSet<Integer>[] partition =
79 (HashSet<Integer>[]) new HashSet[statesLen];
80 @SuppressWarnings({"rawtypes","unchecked"}) final ArrayList<Integer>[] splitblock =
81 (ArrayList<Integer>[]) new ArrayList[statesLen];
82 final int[] block = new int[statesLen];
83 final StateList[][] active = new StateList[statesLen][sigmaLen];
84 final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
85 final LinkedList<IntPair> pending = new LinkedList<>();
86 final BitSet pending2 = new BitSet(sigmaLen*statesLen);
87 final BitSet split = new BitSet(statesLen),
88 refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
89 for (int q = 0; q < statesLen; q++) {
90 splitblock[q] = new ArrayList<>();
91 partition[q] = new HashSet<>();
92 for (int x = 0; x < sigmaLen; x++) {
93 active[q][x] = new StateList();
94 }
95 }
96
97 for (int q = 0; q < statesLen; q++) {
98 final int j = a.isAccept(q) ? 0 : 1;
99 partition[j].add(q);
100 block[q] = j;
101 for (int x = 0; x < sigmaLen; x++) {
102 final ArrayList<Integer>[] r = reverse[a.step(q, sigma[x])];
103 if (r[x] == null) {
104 r[x] = new ArrayList<>();
105 }
106 r[x].add(q);
107 }
108 }
109
110 for (int j = 0; j <= 1; j++) {
111 for (int x = 0; x < sigmaLen; x++) {
112 for (int q : partition[j]) {
113 if (reverse[q][x] != null) {
114 active2[q][x] = active[j][x].add(q);
115 }
116 }
117 }
118 }
119
120
121 for (int x = 0; x < sigmaLen; x++) {
122 final int j = (active[0][x].size <= active[1][x].size) ? 0 : 1;
123 pending.add(new IntPair(j, x));
124 pending2.set(x*statesLen + j);
125 }
126
127
128 int k = 2;
129
130 while (!pending.isEmpty()) {
131
132 final IntPair ip = pending.removeFirst();
133 final int p = ip.n1;
134 final int x = ip.n2;
135
136 pending2.clear(x*statesLen + p);
137
138 for (StateListNode m = active[p][x].first; m != null; m = m.next) {
139 final ArrayList<Integer> r = reverse[m.q][x];
140 if (r != null) {
141 for (int i : r) {
142 if (!split.get(i)) {
143 split.set(i);
144 final int j = block[i];
145 splitblock[j].add(i);
146 if (!refine2.get(j)) {
147 refine2.set(j);
148 refine.set(j);
149 }
150 }
151 }
152 }
153 }
154
155
156 for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
157 final ArrayList<Integer> sb = splitblock[j];
158 if (sb.size() < partition[j].size()) {
159 final HashSet<Integer> b1 = partition[j];
160 final HashSet<Integer> b2 = partition[k];
161 for (int s : sb) {
162 b1.remove(s);
163 b2.add(s);
164 block[s] = k;
165 for (int c = 0; c < sigmaLen; c++) {
166 final StateListNode sn = active2[s][c];
167 if (sn != null && sn.sl == active[j][c]) {
168 sn.remove();
169 active2[s][c] = active[k][c].add(s);
170 }
171 }
172 }
173
174 for (int c = 0; c < sigmaLen; c++) {
175 final int aj = active[j][c].size,
176 ak = active[k][c].size,
177 ofs = c*statesLen;
178 if (!pending2.get(ofs + j) && 0 < aj && aj <= ak) {
179 pending2.set(ofs + j);
180 pending.add(new IntPair(j, c));
181 } else {
182 pending2.set(ofs + k);
183 pending.add(new IntPair(k, c));
184 }
185 }
186 k++;
187 }
188 refine2.clear(j);
189 for (int s : sb) {
190 split.clear(s);
191 }
192 sb.clear();
193 }
194 refine.clear();
195 }
196
197 Automaton result = new Automaton();
198
199 Transition t = new Transition();
200
201
202
203
204 int[] stateMap = new int[statesLen];
205 int[] stateRep = new int[k];
206
207 result.createState();
208
209
210 for (int n = 0; n < k; n++) {
211
212
213 boolean isInitial = false;
214 for (int q : partition[n]) {
215 if (q == 0) {
216 isInitial = true;
217
218 break;
219 }
220 }
221
222 int newState;
223 if (isInitial) {
224 newState = 0;
225 } else {
226 newState = result.createState();
227 }
228
229
230
231 for (int q : partition[n]) {
232 stateMap[q] = newState;
233
234 result.setAccept(newState, a.isAccept(q));
235 stateRep[newState] = q;
236 }
237 }
238
239
240 for (int n = 0; n < k; n++) {
241 int numTransitions = a.initTransition(stateRep[n], t);
242 for(int i=0;i<numTransitions;i++) {
243 a.getNextTransition(t);
244
245 result.addTransition(n, stateMap[t.dest], t.min, t.max);
246 }
247 }
248 result.finishState();
249
250
251 return Operations.removeDeadStates(result);
252 }
253
254 static final class IntPair {
255
256 final int n1, n2;
257
258 IntPair(int n1, int n2) {
259 this.n1 = n1;
260 this.n2 = n2;
261 }
262 }
263
264 static final class StateList {
265
266 int size;
267
268 StateListNode first, last;
269
270 StateListNode add(int q) {
271 return new StateListNode(q, this);
272 }
273 }
274
275 static final class StateListNode {
276
277 final int q;
278
279 StateListNode next, prev;
280
281 final StateList sl;
282
283 StateListNode(int q, StateList sl) {
284 this.q = q;
285 this.sl = sl;
286 if (sl.size++ == 0) sl.first = sl.last = this;
287 else {
288 sl.last.next = this;
289 prev = sl.last;
290 sl.last = this;
291 }
292 }
293
294 void remove() {
295 sl.size--;
296 if (sl.first == this) sl.first = next;
297 else prev.next = next;
298 if (sl.last == this) sl.last = prev;
299 else next.prev = prev;
300 }
301 }
302 }